/*
Content: Figure 3a and 3b
Paper: "Immigrants Return Intentions and Labor Market Behavior when the Home Country is Unsafe"
Authors: Jacopo Bassetto, Teresa Freitas-Monteiro
*/


forvalues rep = 1/300 {

* START WITH RAW SURVEY DATA (E.G. NO EVENTS MERGED)
use "${final}/rawdata_soep.dta", clear
drop if pid==.
drop if corigin==.
rename pmonin imonth

* Interview date 			
foreach var in hlk0059 ptagin hlk0059 {
replace `var' = 15 if `var' == .
}

gen startdate = mdy(imonth,hlk0059,syear)
gen startdate2 =  mdy(imonth,ptagin,syear)
gen startdate3 =  mdy(hlk0060,hlk0059,syear)
gen startdate4 = mdy(pgmonth,hlk0059,syear)
gen startdate5 =  mdy(pgmonth,ptagin,syear)


replace startdate = startdate2 if startdate == .
replace startdate = startdate3 if startdate == .
replace startdate = startdate4 if startdate == .
replace startdate = startdate5 if startdate == .
	
g idate =ym(syear, imonth)
drop if startdate==.

* Country of origin

cap drop soep_corigin_id
clonevar soep_corigin_id = corigin


	* Fix the soep_corigin_id from the SOEP the vlaues for kosovo and albania and eastern europe
		* 128 Malaysia 
		replace soep_corigin_id = 104 if soep_corigin_id == 128
		* 155 Turkmenistan
		replace soep_corigin_id = 91 if soep_corigin_id == 155
		* 7 Germany
		replace soep_corigin_id = 1 if soep_corigin_id == 7
		* USA
		replace soep_corigin_id = 18 if soep_corigin_id == 164
		 *168 Montenegro  
		replace soep_corigin_id = 106 if soep_corigin_id == 168
		*145 Mongolia  129 Samoa missing
	
* Reduce dataset to key variables
drop pg* plb* hlk* plj* plh* bire* bii* pla* pab* plc* pld* plf* ple* plg* pli* plm* p_k* p_n* p_buh* hlc* hlf* hlf* hli* hlj* hac* hld* hlh*  hle* pag* pka* plk* p_is* p_a* living* f*8 m*8 f*92 m*92 pdaue* pinta* p_f* p_d* ic*
	
*******************************************************
***  MERGE SOEP WITH MONTHLY EVENT DATA *************
******************************************************

* Matching date - Month-Year of interview with Month-Year of Event
g year=year(startdate)
g month=month(startdate)
g startmy=ym(year, month)
format startmy %tm

drop if syear==. // ATENTION DROP WHEN NO SURVEY DATA
drop  year month 
cap drop obs

* GENERATE PLACEBO DATES BY RESHUFFLING THE TIME OF EVENT VARIABLE 

preserve 

		use "$globalterror/final/terror_monthlydata_soep_final.dta", clear

		* For each country in the data expand to have a panel month-year for 1970 to 2018
		tab year
		drop if startmy==.
		keep soep_corigin_id
		bys soep_corigin_id: gen unique = _n
		keep if unique == 1 

		expand 588 // (2018-1970+1)*12=588

		* Create variable startmy for all month-year combinations from 1970 to 2019

		gen counter = 1 

		bys soep_corigin_id: gen startmy = 120	// 120 is Jan1970 in the month-year number classification
		bys soep_corigin_id: gen spell = _n - 1 

		bys soep_corigin_id: replace startmy = startmy + spell 

		format startmy %tm		

		merge 1:1 startmy soep_corigin_id using "$globalterror/final/terror_monthlydata_soep_final.dta", gen(_merge_terror) keepusing(soep_corigin_id startmy terror  year month  )

		drop if inlist(year, 1975.5, 1977.5)

		***** SHUFFLE DATE HERE *************

		egen group = group(soep_corigin_id year) 

		shufflevar startmy, cluster(soep_corigin_id)  // alternative: reshuffle terror, cluster(group)) which varies terror events within survey year and country

		rename startmy startmy_orig
		rename startmy_shuffled startmy


		keep  startmy soep_corigin_id terror _merge_terror // note: change/add here variables for placebo
		cap drop year month
		g year=year(dofm(startmy))
		g month=month(dofm(startmy))


		sort soep_c startmy year month
		drop if _merge_terror==2

		* If in a given month there is no event assign a zero
		foreach var in   terror     {
		replace `var' = 0 if _merge_terror == 1 // months with no terror	
		}

		*******************************************
		*** RELEVANT AND ISOLATED TERROR EVENTS ***
		*******************************************

		* Yearly totals
		foreach var in   terror   {
		bys year soep_corigin_id: egen `var'_y = total(`var') 
		}

		* RELEVANCE: Difference with respect to the yearly average in the 3, 4 and 5 years before
		foreach var in  terror     {

		* Generate yearly averages (not total)
		bys year soep_corigin_id: egen `var'_ya = mean(`var')

		* Generate difference with respect to the yearly average of the previous three years
		bys soep_corigin_id (startmy): gen `var'_m3y = (`var' - ((`var'_ya[_n-12] + `var'_ya[_n-24] + `var'_ya[_n-36])/3))

		}

		g event_terror=terror>=1  & terror!=.


		* There is at least 1 relevant terror event
		foreach event in  terror  {	
		foreach var in	 m3y {
		g event`event'_`var'=`event'_`var'>=1 
		}
		}	

		* ISOLATE: To use multiple events in one year, we need to isolate those events around time bands
		bysort soep_corigin_id year: g obs=_n
		cap drop months_year
		bysort soep_corigin_id (startmy): g months_year=_n

		*This accumulates the events over the past 1 month, 2 months and 3 montha
		cap drop x 
		foreach event in  terror     {	
		foreach var in	 m3y {

		rangestat (sum) event`event'_`var'_p30=event`event'_`var', interval(months_year,-1,0) by(soep_corigin_id)
		replace event`event'_`var'_p30=. if year==1987 & month==1

		rangestat (sum) event`event'_`var'_p60=event`event'_`var', interval(months_year,-2,0) by(soep_corigin_id)
		replace event`event'_`var'_p60=. if year==1987 & inrange(month,1,2)

		rangestat (sum) event`event'_`var'_p90=event`event'_`var', interval(months_year,-3,0) by(soep_corigin_id)
		replace event`event'_`var'_p90=. if year==1987 & inrange(month,1,3)

		}
		}


		* Identify "isolated" events, e.g. months were in the previous 1, 2 or 3 months there were no events
		foreach event in  terror      {	
		foreach var in	 m3y {
		g keep_e`event'_`var'_p30=event`event'_`var'_p30<=1
		g keep_e`event'_`var'_p60=event`event'_`var'_p60<=1
		g keep_e`event'_`var'_p90=event`event'_`var'_p90<=1		
		}
		}


		* Identify the date of these isolated events
		foreach event in  terror      {	
		foreach var in	 m3y {
		g date_e`event'_`var'_p30=startmy if event`event'_`var'==1 & keep_e`event'_`var'_p30==1
		g date_e`event'_`var'_p60=startmy if event`event'_`var'==1 & keep_e`event'_`var'_p60==1
		g date_e`event'_`var'_p90=startmy if event`event'_`var'==1 & keep_e`event'_`var'_p90==1			
		}
		}
		format date_e*_p*0 %tm



		* Allocates the date of isolated events to the pre-and post-reference bands
		foreach event in  terror      {	
		foreach var in	 m3y {
		rangestat (min) relevant`event'_`var'_p30=date_e`event'_`var'_p30 if keep_e`event'_`var'_p30==1, interval(startmy, -1, 1) by(soep_corigin_id)
		rangestat (min) relevant`event'_`var'_p60=date_e`event'_`var'_p60 if keep_e`event'_`var'_p60==1, interval(startmy, -2, 2) by(soep_corigin_id)
		rangestat (min) relevant`event'_`var'_p90=date_e`event'_`var'_p90 if keep_e`event'_`var'_p90==1, interval(startmy, -3, 3) by(soep_corigin_id)
		}
		}
		format relevant*_p*0 %tm

		* Creates the distance in months from the isolated event
		foreach event in  terror     {	
		foreach var in	 m3y {
		g distance`event'_`var'_p30=startmy- relevant`event'_`var'_p30 if relevant`event'_`var'_p30!=.
		g distance`event'_`var'_p60=startmy- relevant`event'_`var'_p60 if relevant`event'_`var'_p60!=.
		g distance`event'_`var'_p90=startmy- relevant`event'_`var'_p90 if relevant`event'_`var'_p90!=.
		}
		}


		* Make sure that for every isolated event there is a control and treatment group within the corresponding bandwidth  (this is to be consistent with the IEB)
		foreach event in  terror      {	
		foreach var in	 m3y {
		g select`event'_`var'_p30=1 if date_e`event'_`var'_p30!=. & distance`event'_`var'_p30[_n-1]==-1 & distance`event'_`var'_p30[_n+1]==1
		g select`event'_`var'_p60=1 if date_e`event'_`var'_p60!=. & distance`event'_`var'_p60[_n-1]==-1 & distance`event'_`var'_p60[_n+1]==1 & distance`event'_`var'_p60[_n-2]==-2 & distance`event'_`var'_p60[_n+2]==2
		g select`event'_`var'_p90=1 if date_e`event'_`var'_p90!=. & distance`event'_`var'_p90[_n-1]==-1 & distance`event'_`var'_p90[_n+1]==1 & distance`event'_`var'_p90[_n-2]==-2 & distance`event'_`var'_p90[_n+2]==2 & distance`event'_`var'_p90[_n-3]==-3 & distance`event'_`var'_p90[_n+3]==3
		}
		}

		foreach event in  terror     {	
		foreach var in	 m3y {
		replace date_e`event'_`var'_p30=. if select`event'_`var'_p30!=1
		replace date_e`event'_`var'_p60=. if select`event'_`var'_p60!=1
		replace date_e`event'_`var'_p90=. if select`event'_`var'_p90!=1		
		}
		}

		foreach event in terror     {	
		foreach var in	 m3y {

		cap drop relevant`event'_`var'_p30 relevant`event'_`var'_p60 relevant`event'_`var'_p90
		rangestat (min) relevant`event'_`var'_p30=date_e`event'_`var'_p30 if keep_e`event'_`var'_p30==1, interval(startmy, -1, 1) by(soep_corigin_id)
		rangestat (min) relevant`event'_`var'_p60=date_e`event'_`var'_p60 if keep_e`event'_`var'_p60==1, interval(startmy, -2, 2) by(soep_corigin_id)
		rangestat (min) relevant`event'_`var'_p90=date_e`event'_`var'_p90 if keep_e`event'_`var'_p90==1, interval(startmy, -3, 3) by(soep_corigin_id)

		}
		}
		format relevant*_p*0 %tm

		* Create the distance in months from the isolated event
		foreach event in  terror     {	
		foreach var in	 m3y {
		cap drop  distance`event'_`var'_p30 distance`event'_`var'_p60 distance`event'_`var'_p90
		g distance`event'_`var'_p30=startmy- relevant`event'_`var'_p30 if relevant`event'_`var'_p30!=.
		g distance`event'_`var'_p60=startmy- relevant`event'_`var'_p60 if relevant`event'_`var'_p60!=.
		g distance`event'_`var'_p90=startmy- relevant`event'_`var'_p90 if relevant`event'_`var'_p90!=.
		}
		}

		tempfile terror_reshuffled
		
		cap drop startdate 
		save  `terror_reshuffled'

restore 

********************************************************************************
*MERGE PLACEBO EVENTS TO SOEP
********************************************************************************

fmerge m:1 soep_corigin_id startmy  using `terror_reshuffled', gen(eventmerge_m) 
drop if eventmerge_m==2


* Matching date - Month-Year of interview with Month-Year of Event
cap drop year
cap drop month
cap drop startmy

g year=year(startdate)
g month=month(startdate)
g startmy=ym(year, month)
format startmy %tm
format startdate %td


			
*Pre-post event
foreach x in 90 {
foreach event in terror  {	
foreach var in	 m3y {		
cap drop post`event'_`var'_p`x' 
g post`event'_`var'_p`x' =0 if distance`event'_`var'_p`x' <=0
replace post`event'_`var'_p`x' =1 if distance`event'_`var'_p`x'>=1 & distance`event'_`var'_p`x'!=.
}
}
}

foreach event in terror     {	
foreach var in	 m3y  {
label def post`event'_`var'_p90 0 "Pre-Terror" 1 "Post-Terror", replace
label val post`event'_`var'_p90 post`event'_`var'_p90
label var post`event'_`var'_p90 "Terror"
}
}
	

	
* Generate clusters
foreach var in	m3y {
foreach x in	 30 60 90 {
	
egen clusterterror_`var'_p`x'=group(relevantterror_`var'_p`x')

}
}

*********************************************************************************
* PLACEBO ESTIMATES *
*********************************************************************************
local outcome remain_ger_per 

foreach event in "terror" {	

foreach var in m3y   {
	
g posdist`event'_`var'_p90=distance`event'_`var'_p90 + 3


* We dont use the month when the event occurred		
		drop if distance`event'_`var'_p90==0 
		
		* PLOTS 
		foreach out of varlist `outcome' {
				
			local l`out': variable label `out'  			// save variable names in a local 
	

				* Outcomes
			
				global select_reg_M "!inlist(soep_corigin_id,.,1) & inrange(distance`event'_`var'_p90,-3,3)"
				
					global controls "i.soep_corigin_id#i.syear i.syear#i.month i.bula" 	
					
					
					
					* EVENT STUDY COEFFICIENTS
					
					reghdfe `out' ib2.posdist`event'_`var'_p90 if $select_reg_M & inrange(syear, 2000, 2018) , vce(cluster cluster`event'_`var'_p90) absorb($controls)
						cd "$final/placebo"
						parmest, idnum(`rep') idstr("`out'") saving(`"`out'_`rep'_plot"', replace)
		
	
					
					* PRE-POST COEFFICIENT
					
					global select_reg_M "!inlist(soep_corigin_id,.,1) & inrange(distance`event'_`var'_p90,-3,3)" 
					
					reghdfe `out' i.post`event'_`var'_p90 if $select_reg_M & inrange(syear, 2000, 2018) , vce(cluster cluster`event'_`var'_p90) absorb($controls)	
						cd "$final/placebo"
						parmest, idnum(`rep') idstr("`out'") saving(`"`out'_`rep'_post"', replace)
		
				
			
						
					}
				}
			}
			
}	

		
		
	stop	
	
** PULL TOGETHER ALL ESTIMATES FROM THE 300 RANDOM DRAWS 

	preserve
	
		foreach out in remain_ger_per {
		cap drop *
		
		forvalues rep = 1/300 {
	
			
			append using `out'_`rep'_plot
		
			}
			
			save "$final/placebo/`out'_plot_reshuffle_startmy_final.dta", replace 
			
			}
		restore		
				
				
	preserve			
		foreach out in remain_ger_per {
		cap drop *
		
		forvalues rep = 1/300 {
	
			
			append using `out'_`rep'_post
		
			}
			
			save "$final/placebo/`out'_post_reshuffle_startmy_final.dta", replace 
			
			}
		restore
	
	* Delete datasets
	foreach out in remain_ger_per {
	forvalues rep = 1/300 {
		
	erase `out'_`rep'_plot.dta
	erase `out'_`rep'_post.dta
		}
			}
		
********************************************************************************
*** Figure 3a
********************************************************************************
	
* 1. LOAD DATASET OF ESTIMATES 

	use "$final/placebo/remain_ger_per_plot_reshuffle_startmy_final.dta", clear 
	
* 2. GENERATE ID OF COEFFICIENT
	
	bys idnum: gen n = _n 
	
	drop if parm == "_cons"			// drop the constant 
	

* 3. REDUCE TO ONE SET OF ESTIMATES AND CI, averaging on all repetitions (id of repetition is idnum)


foreach var in "estimate" "min95" "max95" {

bys n: egen m_`var' = mean(`var')

}

keep if idnum == 1 					// keep only one observation with the means


* 4. PLOTS

twoway (scatter m_estimate n, lcolor(navy) mcolor(navy)) (rcap m_min95 m_max95 n, lcolor(navy)), ///
		yline(0, lpattern(solid) lcolor(cranberry)) xline(3.5, lpattern(solid) lcolor(cranberry)) xlabel(1 "-3" 2 "-2" 3 "-1" 4 "1" 5 "2" 6 "3" , labsize(medsmall) nogrid) ytitle("Placebo coefficients", margin(medsmall)) xtitle("Months to event", margin(medsmall))  ///
					legend(off) yscale(r(-0.20(0.10)0.20)) ylabel(-0.20(0.10)0.20, labsize(medsmall) nogrid) ///
					graphregion(color(white)) plotregion(lcolor(black)) xsize(5.5) ysize(4)
					
			graph save "Graph" "$graph/fig3a_final.gph", replace
			graph export "$graph/fig3a_final.pdf", as(pdf) replace
			graph export "$graph/fig3a_final.eps", as(eps) replace
			
			
********************************************************************************
*** Figure 3b
********************************************************************************

* 1. LOAD DATASET OF ESTIMATES 

	use "$final/placebo/remain_ger_per_post_reshuffle_startmy_final.dta", clear 
	
* 2. GENERATE ID OF COEFFICIENT
	
	bys idnum: gen n = _n 
	
	drop if inlist(n,1,3) 			// drop the constant 


* 3. SORT BY SIZE OF COEFFICIENT AND GIVE NUMBER
	
	sort estimate
	
	cap drop order
	gen order = _n 

* 4. PLOTS

kdensity estimate, graphregion(color(white)) plotregion(lcolor(black)) ///
    ylabel(none,nogrid) ytitle("") xtitle("Placebo coefficients", margin(medsmall)) ///
    xline(0.123, lpattern(solid) lcolor(cranberry)) ///
     xscale(r(-.15(0.05).15)) xlabel(-.15(0.05).15, nogextend nogrid)  ///
    lcolor(navy) xsize(5.5) ysize(4) ///
    title("") ///
    note("")
			
			graph save "Graph" "$graph/fig3b_final.gph", replace
			graph export "$graph/fig3b_final.pdf", as(pdf) replace
			graph export "$graph/fig3b_final.eps", as(eps) replace

